************************************************
* Sleep Project - Pedro Bessone, Gautam Rao, Heather Schofield, Frank Schilbach, and Mattie Toma
* Purpose: Replicates Appendix Table 16 (Heterogeneous Treatment Effects for Main Outcomes of Interest)
* Last edited: 07 May 2021
************************************************

clear all
set more off
set seed 1

	do "$s/anderson_index_program.do"
	
	cap reghdfe, compile
	
**************
* Data setup *
**************

	use "$d/heterogeneity_dataset.dta", clear
	
	merge m:1 pid post_treatment using "$d/anderson_indices.dta"
	drop _merge 
	
	* Add baseline naps variable 
	
	preserve 
	clear all 

	use "$d/baseline_cleaned.dta"
	keep pid c27 
	tempfile baseline_naps
	save `baseline_naps'

	restore 

	merge m:1 pid using "`baseline_naps'"
	keep if _merge==3
	drop _merge

	merge 1:1 pid day_in_study using "$d/analysis_base.dta"
	keep if _merge==3
	drop _merge

	
	rename prod mean_prod
	rename typing mean_typing
	rename productivity prod 
	rename typing_time_hr typing
	rename earnings earnings
	
	rename ( sleep_night sleep_eff nap_time_mins c27 above_med_quality) ( ns_duration ns_quality nap_duration num_nap_bas above_median_quality)
	rename (cogindex_pre cogindex_post pref_pre  pref_post) (cognitive_index_base cognitive_index pref_index_base  pref_index)
	
	
	* Generate binary = 1 if the person naps 1 time or more at baseline
	gen nap_bas = 0 
	replace nap_bas = 1 if num_nap_bas!=0 
	
	*Above median age
	gen above_median_age = (age >= 3)
	
	* Fix inconsistencies in treatment assignment 
	egen treat_diff = mean(treat_pool), by (pid)
	replace treat_pool=1 if treat_diff!=0 
	
	
*****************
* WORK OUTCOMES *
*****************

	
	* Standardized vars already defined for earnings, dropping them and re-doing this to be sure 
	cap drop earnings_post_std earnings_pre_std
		
	*Standardize
	foreach var in prod typing earnings{  
		sum `var' if treat_pool == 0 & treat_nap == 0 & post_treatment==1
		gen `var'_post_std = (`var' - r(mean)) / r(sd)

		bysort pid: egen `var'_pre_temp = mean(`var') if post_treatment == 0
		bysort pid: egen `var'_pre = mean(`var'_pre_temp)
		sum `var'_pre if treat_pool == 0 & treat_nap == 0 & post_treatment==0
		gen `var'_pre_std = (`var'_pre - r(mean)) /r(sd)
		drop `var'_pre `var'_pre_temp
	}
	
	foreach var in prod typing earnings{ 

		*baseline
		cap drop _baseline
		cap drop baseline //necessary to divide in two lines because "baseline" may be ambiguous (and therefore drop will not work)
		gen _baseline	= `var' if post_treatment==0
		egen baseline 	= mean(_baseline), by(pid)
		
		*above median baseline
		cap drop above_median_outcome
		sum baseline if tag_pid == 1, d
		gen above_median_outcome = (baseline>=`r(p50)')
			
		local controls "female i.age long_day fraction_high extra_work"
					
		*benchmark regressions, no interaction
		reghdfe `var'_post_std treat_pool treat_nap baseline `controls' if post_treatment==1 & at_present_check==1, vce(cluster pid) absorb(date day_in_study)
		
		local pids_`var' = string(e(N_clust))
		local obs_`var' = string(e(N))
		
		lincom _b[treat_pool]
		local coef_ns_`var'_1 = string(r(estimate),"%3.2f")
		local se_ns_`var'_1 = string(r(se),"%3.2f")
		local p_ns_`var'_1 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

		lincom _b[treat_nap]
		local coef_nap_`var'_1 = string(r(estimate),"%3.2f")
		local se_nap_`var'_1 = string(r(se),"%3.2f")
		local p_nap_`var'_1 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
		*empty locals just to make loop run
		local coef_above_`var'_1 = 0
		local se_above_`var'_1 = 0
		local p_above_`var'_1 = 0
		
		local coef_int_`var'_1 = 0
		local se_int_`var'_1 = 0
		local p_int_`var'_1 = 0
		
		local coef_intnap_`var'_1 = 0
		local se_intnap_`var'_1 = 0
		local p_intnap_`var'_1 = 0
		
		sum `var' if treat_pool == 0 & treat_nap == 0 & post_treatment == 1
		local mean_`var'_1 = string(r(mean), "%3.2f")
		local sd_`var'_1 = string(r(sd), "%3.2f")
		
		
		*interactions
		local i = 1
		foreach int in length eff outcome nap_bas female{
			
			local i = `i' + 1
			
		*regressions
		if "`int'" == "outcome"{
				reghdfe `var'_post_std treat_pool treat_nap `controls' i.above_median_`int' 1.treat_pool#1.above_median_`int' 1.treat_nap#1.above_median_`int'  if post_treatment==1 & at_present_check==1, vce(cluster pid) absorb(date day_in_study)
			}
		else if "`int'" == "nap_bas"{
				reghdfe `var'_post_std treat_pool treat_nap baseline nap_bas `controls' 1.treat_pool#1.nap_bas 1.treat_nap#1.nap_bas if post_treatment==1 & at_present_check==1, vce(cluster pid) absorb(date day_in_study)
			}
		else if "`int'" == "female"{
				reghdfe `var'_post_std treat_pool treat_nap baseline `controls' 1.treat_pool#1.female 1.treat_nap#1.female if post_treatment==1 & at_present_check==1, vce(cluster pid) absorb(date day_in_study)
			}
		else {
				reghdfe `var'_post_std treat_pool treat_nap baseline `controls' i.above_median_`int' 1.treat_pool#1.above_median_`int' 1.treat_nap#1.above_median_`int' if post_treatment==1 & at_present_check==1, vce(cluster pid) absorb(date day_in_study)
			}
			
			*coefficient, se and p-val
			lincom _b[treat_pool]
			local coef_ns_`var'_`i' = string(r(estimate),"%3.2f")
			local se_ns_`var'_`i' = string(r(se),"%3.2f")
			local p_ns_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			
			lincom _b[treat_nap]
			local coef_nap_`var'_`i' = string(r(estimate),"%3.2f")
			local se_nap_`var'_`i' = string(r(se),"%3.2f")
			local p_nap_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			
			
			if "`int'" == "female"{
				lincom _b[female]
				local coef_above_`var'_`i' = string(r(estimate),"%3.2f")
				local se_above_`var'_`i' = string(r(se),"%3.2f")
				local p_above_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_pool#1.female]
				local coef_int_`var'_`i' = string(r(estimate),"%3.2f")
				local se_int_`var'_`i' = string(r(se),"%3.2f")
				local p_int_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_nap#1.female]
				local coef_intnap_`var'_`i' = string(r(estimate),"%3.2f")
				local se_intnap_`var'_`i' = string(r(se),"%3.2f")
				local p_intnap_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			}
			
			else if "`int'" == "nap_bas"{
				lincom _b[nap_bas]
				local coef_above_`var'_`i' = string(r(estimate),"%3.2f")
				local se_above_`var'_`i' = string(r(se),"%3.2f")
				local p_above_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_pool#1.nap_bas]
				local coef_int_`var'_`i' = string(r(estimate),"%3.2f")
				local se_int_`var'_`i' = string(r(se),"%3.2f")
				local p_int_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_nap#1.nap_bas]
				local coef_intnap_`var'_`i' = string(r(estimate),"%3.2f")
				local se_intnap_`var'_`i' = string(r(se),"%3.2f")
				local p_intnap_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			}
			
			else{
				lincom _b[1.above_median_`int']
				local coef_above_`var'_`i' = string(r(estimate),"%3.2f")
				local se_above_`var'_`i' = string(r(se),"%3.2f")
				local p_above_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_pool#1.above_median_`int']
				local coef_int_`var'_`i' = string(r(estimate),"%3.2f")
				local se_int_`var'_`i' = string(r(se),"%3.2f")
				local p_int_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_nap#1.above_median_`int']
				local coef_intnap_`var'_`i' = string(r(estimate),"%3.2f")
				local se_intnap_`var'_`i' = string(r(se),"%3.2f")
				local p_intnap_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			}
							
			*control mean and sd
			if "`int'" == "female"{
				sum `var' if treat_pool == 0 & treat_nap == 0 & female == 0 & post_treatment == 1
				local mean_`var'_`i' = string(r(mean), "%3.2f")
				local sd_`var'_`i' = string(r(sd), "%3.2f")
			}
			else if "`int'" == "nap_bas"{
				sum `var' if treat_pool == 0 & treat_nap == 0 & nap_bas == 0 & post_treatment == 1
				local mean_`var'_`i' = string(r(mean), "%3.2f")
				local sd_`var'_`i' = string(r(sd), "%3.2f")
			}
			else {
				sum `var' if treat_pool == 0 & treat_nap == 0 & above_median_`int' == 0 & post_treatment == 1
				local mean_`var'_`i' = string(r(mean), "%3.2f")
				local sd_`var'_`i' = string(r(sd), "%3.2f")
			}
			}
		
		
		* Significance
			foreach i in ns nap above int intnap{
				forval j = 1/6{
					if `p_`i'_`var'_`j'' < 0.01{
						local coef_`i'_`var'_`j' = "`coef_`i'_`var'_`j''***"
					}
					if `p_`i'_`var'_`j'' < 0.05 & `p_`i'_`var'_`j'' >= 0.01{
						local coef_`i'_`var'_`j' = "`coef_`i'_`var'_`j''**"
					}
					if `p_`i'_`var'_`j'' < 0.1 & `p_`i'_`var'_`j'' >= 0.05{
						local coef_`i'_`var'_`j' = "`coef_`i'_`var'_`j''*"
					}
				}
			}
		}
	
	drop baseline
***********************************************
* Heterogeneity for Overall Index * 
***********************************************

collapse overall_post overall_pre (max) *index* above_med* nap_bas female age treat_nap treat_pool, by (pid post_treatment)

rename overall_post overall
rename overall_pre baseline


	foreach var in overall{ 
		
		
		*above median baseline
		cap drop above_median_outcome
		sum baseline, d
		gen above_median_outcome = (baseline>=`r(p50)')
			
		local controls "female i.age"
					
		*benchmark regressions, no interaction
		reghdfe `var' treat_pool treat_nap baseline female if post_treatment==1, cluster (pid) absorb(i.age)
		
		local pids_`var' = string(e(N_clust))
		local obs_`var' = string(e(N))
		
		lincom _b[treat_pool]
		local coef_ns_`var'_1 = string(r(estimate),"%3.2f")
		local se_ns_`var'_1 = string(r(se),"%3.2f")
		local p_ns_`var'_1 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

		lincom _b[treat_nap]
		local coef_nap_`var'_1 = string(r(estimate),"%3.2f")
		local se_nap_`var'_1 = string(r(se),"%3.2f")
		local p_nap_`var'_1 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
		
		lincom _b[baseline]
		local coef_bas_`var'_1 = string(r(estimate),"%3.2f")
		local se_bas_`var'_1 = string(r(se),"%3.2f")
		local p_bas_`var'_1 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
		
		*empty locals just to make loop run
		local coef_above_`var'_1 = 0
		local se_above_`var'_1 = 0
		local p_above_`var'_1 = 0
		
		local coef_int_`var'_1 = 0
		local se_int_`var'_1 = 0
		local p_int_`var'_1 = 0
		
		local coef_intnap_`var'_1 = 0
		local se_intnap_`var'_1 = 0
		local p_intnap_`var'_1 = 0
		
		sum `var' if treat_pool == 0 & treat_nap == 0 & post_treatment == 1
		local mean_`var'_1 = string(r(mean), "%3.2f")
		local sd_`var'_1 = string(r(sd), "%3.2f")
		
		
		*interactions
		local i = 1
		foreach int in length eff outcome nap_bas female age{
			
			local i = `i' + 1
			
			*regressions
			if "`int'" == "outcome"{
				reghdfe `var' treat_pool treat_nap `controls' i.above_median_`int' 1.treat_pool#1.above_median_`int' 1.treat_nap#1.above_median_`int'  if post_treatment==1, vce(cluster pid) noabsorb
			}
			else if "`int'" == "nap_bas"{
				reghdfe `var' treat_pool treat_nap baseline `controls' nap_bas 1.treat_pool#1.nap_bas 1.treat_nap#1.nap_bas if post_treatment==1, vce(cluster pid) noabsorb
			}
			else if "`int'" == "female"{
				reghdfe `var' treat_pool treat_nap baseline `controls' 1.treat_pool#1.female 1.treat_nap#1.female if post_treatment==1, vce(cluster pid) noabsorb
			}
			else if "`int'" == "age"{
				reghdfe `var' treat_pool treat_nap baseline female i.above_median_`int' 1.treat_pool#1.above_median_`int' 1.treat_nap#1.above_median_`int' if post_treatment==1, vce(cluster pid) noabsorb
			}
			else {
				reghdfe `var' treat_pool treat_nap baseline `controls' i.above_median_`int' 1.treat_pool#1.above_median_`int' 1.treat_nap#1.above_median_`int' if post_treatment==1, vce(cluster pid) noabsorb
			}
			
			*coefficient, se and p-val
			lincom _b[treat_pool]
			local coef_ns_`var'_`i' = string(r(estimate),"%3.2f")
			local se_ns_`var'_`i' = string(r(se),"%3.2f")
			local p_ns_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			
			if "`int'" == "female"{
				lincom _b[female]
				local coef_above_`var'_`i' = string(r(estimate),"%3.2f")
				local se_above_`var'_`i' = string(r(se),"%3.2f")
				local p_above_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_pool#1.female]
				local coef_int_`var'_`i' = string(r(estimate),"%3.2f")
				local se_int_`var'_`i' = string(r(se),"%3.2f")
				local p_int_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_nap#1.female]
				local coef_intnap_`var'_`i' = string(r(estimate),"%3.2f")
				local se_intnap_`var'_`i' = string(r(se),"%3.2f")
				local p_intnap_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			}
			
				else if "`int'" == "nap_bas"{
				
				lincom _b[nap_bas]
				local coef_above_`var'_`i' = string(r(estimate),"%3.2f")
				local se_above_`var'_`i' = string(r(se),"%3.2f")
				local p_above_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_pool#1.nap_bas]
				local coef_int_`var'_`i' = string(r(estimate),"%3.2f")
				local se_int_`var'_`i' = string(r(se),"%3.2f")
				local p_int_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_nap#1.nap_bas]
				local coef_intnap_`var'_`i' = string(r(estimate),"%3.2f")
				local se_intnap_`var'_`i' = string(r(se),"%3.2f")
				local p_intnap_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			}
			
			else{
				lincom _b[1.above_median_`int']
				local coef_above_`var'_`i' = string(r(estimate),"%3.2f")
				local se_above_`var'_`i' = string(r(se),"%3.2f")
				local p_above_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_pool#1.above_median_`int']
				local coef_int_`var'_`i' = string(r(estimate),"%3.2f")
				local se_int_`var'_`i' = string(r(se),"%3.2f")
				local p_int_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
				lincom _b[1.treat_nap#1.above_median_`int']
				local coef_intnap_`var'_`i' = string(r(estimate),"%3.2f")
				local se_intnap_`var'_`i' = string(r(se),"%3.2f")
				local p_intnap_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			}
				
			lincom _b[treat_nap]
			local coef_nap_`var'_`i' = string(r(estimate),"%3.2f")
			local se_nap_`var'_`i' = string(r(se),"%3.2f")
			local p_nap_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			
			if "`int'" == "outcome"{
				*lincom _b[baseline]
				*just to make it appear on the loop
				local coef_bas_`var'_`i' = 0
				local se_bas_`var'_`i' = 0
				local p_bas_`var'_`i' = 0
			}
			else {
				lincom _b[baseline]
				local coef_bas_`var'_`i' = string(r(estimate),"%3.2f")
				local se_bas_`var'_`i' = string(r(se),"%3.2f")
				local p_bas_`var'_`i' = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			}
			
			*control mean and sd
			if "`int'" == "female"{
				sum `var' if treat_pool == 0 & treat_nap == 0 & female == 0 & post_treatment == 1
				local mean_`var'_`i' = string(r(mean), "%3.2f")
				local sd_`var'_`i' = string(r(sd), "%3.2f")
			}
			
			else if "`int'" == "nap_bas"{
				sum `var' if treat_pool == 0 & treat_nap == 0 & nap_bas == 0 & post_treatment == 1
				local mean_`var'_`i' = string(r(mean), "%3.2f")
				local sd_`var'_`i' = string(r(sd), "%3.2f")
			}
			
			else {
				sum `var' if treat_pool == 0 & treat_nap == 0 & above_median_`int' == 0 & post_treatment == 1
				local mean_`var'_`i' = string(r(mean), "%3.2f")
				local sd_`var'_`i' = string(r(sd), "%3.2f")
			}
		}
		
		* Significance
			foreach i in ns above int nap bas intnap{
				forval j = 1/7{
					if `p_`i'_`var'_`j'' < 0.01{
						local coef_`i'_`var'_`j' = "`coef_`i'_`var'_`j''***"
					}
					if `p_`i'_`var'_`j'' < 0.05 & `p_`i'_`var'_`j'' >= 0.01{
						local coef_`i'_`var'_`j' = "`coef_`i'_`var'_`j''**"
					}
					if `p_`i'_`var'_`j'' < 0.1 & `p_`i'_`var'_`j'' >= 0.05{
						local coef_`i'_`var'_`j' = "`coef_`i'_`var'_`j''*"
					}
				}
			}
		}
			

******************************************
*  Heterogeneity Table for Overall Index *
******************************************

cd "$oa/Tables"
	
	file open f using "TableA16_main_overall_index_hte.tex", write replace
	file write f "\begin{tabular}{@{}c@{}}" _n ///
	"\begin{tabular}{l*{7}{c}}" _n ///
	"\toprule" _n ///
	"& \multicolumn{7}{c}{\textbf{Overall Index}}\\" _n ///
	///
	"\cmidrule(lr){2-8} & & \makecell{X=Sleep \\ Duration} & \makecell{X=Sleep \\ Efficiency} & \makecell{X=Baseline \\ Outcome} & \makecell{X=Baseline \\ Naps} & \makecell{X=Female} & \makecell{X=Age} \\" _n ///
	///
	"& (1) & (2) & (3) & (4) & (5) & (6) & (7) \\" _n ///
	///
	"\midrule" _n ///
	/// 
	"Night-Sleep Treatments & `coef_ns_overall_1' & `coef_ns_overall_2' & `coef_ns_overall_3' & `coef_ns_overall_4' & `coef_ns_overall_5' & `coef_ns_overall_6' & `coef_ns_overall_7'\\" _n /// 
	"& (`se_ns_overall_1') & (`se_ns_overall_2') & (`se_ns_overall_3') & (`se_ns_overall_4') & (`se_ns_overall_5') & (`se_ns_overall_6') & (`se_ns_overall_7')\\" _n /// 
	"\addlinespace" _n ///
	///
	"X & & `coef_above_overall_2' & `coef_above_overall_3' & `coef_above_overall_4' & `coef_above_overall_5' & `coef_above_overall_6' & `coef_above_overall_7'\\" _n /// 
	"& & (`se_above_overall_2') & (`se_above_overall_3') & (`se_above_overall_4') & (`se_above_overall_5') & (`se_above_overall_6') & (`se_above_overall_7')\\" _n /// 
	"\addlinespace" _n ///
	///
	"Night-Sleep Treatments*X & & `coef_int_overall_2' & `coef_int_overall_3' & `coef_int_overall_4' & `coef_int_overall_5' & `coef_int_overall_6' & `coef_int_overall_7'\\" _n /// 
	"& & (`se_int_overall_2') & (`se_int_overall_3') & (`se_int_overall_4') & (`se_int_overall_5') & (`se_int_overall_6') & (`se_int_overall_7')\\" _n /// 
	"\addlinespace" _n ///
	///
	"Nap Treatment & `coef_nap_overall_1' & `coef_nap_overall_2' & `coef_nap_overall_3' & `coef_nap_overall_4' & `coef_nap_overall_5' & `coef_nap_overall_6' & `coef_nap_overall_7'\\" _n /// 
	"& (`se_nap_overall_1') & (`se_nap_overall_2') & (`se_nap_overall_3') & (`se_nap_overall_4') & (`se_nap_overall_5') & (`se_nap_overall_6') & (`se_nap_overall_7')\\" _n /// 
	"\addlinespace" _n ///
	///
	"Nap Treatment*X & & `coef_intnap_overall_2' & `coef_intnap_overall_3' & `coef_intnap_overall_4' & `coef_intnap_overall_5' & `coef_intnap_overall_6' & `coef_intnap_overall_7'\\" _n /// 
	"& & (`se_intnap_overall_2') & (`se_intnap_overall_3') & (`se_intnap_overall_4') & (`se_intnap_overall_5') & (`se_intnap_overall_6') & (`se_intnap_overall_7')\\" _n /// 
	"\addlinespace" _n ///
	///
	"\midrule" _n ///
	" Participants & `pids_overall' & `pids_overall' & `pids_overall' & `pids_overall' & `pids_overall' & `pids_overall' & `pids_overall'\\"  _n ///
	///	
	"\bottomrule" _n ///
	"\end{tabular}" _n ///
	"\end{tabular}" _n
	file close f			
							
